import json
import datetime
import os
from collections import Counter
import csv
ht=[ '#LGBTQHistory', '#HorrorToK', '#ProblemSolved', '#SkateLife', '#ThriftShop', '#SpookyTreats',  '#tiktokfood', '#interiordesign', '#sfxmakeup', '#ComingOfAge', '#falldiy', '#worldseries', '#gaminglife', '#meleaving', '#yellow', '#yougotthis','#mycostume','#mypfp','#welldone','#nativefamily','#happyhalloween', '#OhNo','#homeoffice','#myrecommendation','#myhobby','#fanedit','#stemlife','#motivationmonday','#wip','#veteransday','#holidaycountdown','#onhold','#bekind','#WeekendVibes','#growupwithme','#holidaysourway','#tabletop','#RoomTour','#bakingszn','#nbadraft',
     '#nonuancenovember','#theatrekids','#gamingsetup','#carsoftiktok','#needtoknow','#whenwewereyounger','#halloweenishere','#ourtype','#fallguysmoments','#workingathome','#onlinedating','#familyrecipe','#givingthanks','#readysetshop','#diceroll','#holidayvibes','#coldweather','#holidaytiktok','#recordsday','#inkdrawing','#happyholidays','#homecooked','#rnbvibes','#easydiy','#wildanimals','#neonshadow','graphicdesign' , '#youwantmore' ,'#familyimpression','#givingszn','#watchmegrow','#whereilive','foodtiktok','#selfimprovement','#holidaymusic','#howbizarre','#ImAGhost', '#personalfinance']
ht=[h.lower().replace('#','') for h in ht]
for hashtag in ht:
    c=Counter()
    datalist=[]
    print(hashtag)
    with open('D:\\Work\\Tool\\tiktok\\compare\\'+hashtag+'.tsv', 'w', encoding='utf-8', newline='\n') as filename_output:
        writer = csv.writer(filename_output, delimiter='\t')
        writer.writerow(['filename','unique ids','in previous not in this','in this not in previous','is in trending','average digg count','average share count','average play count','average comment count'])
        for name in os.listdir('D:\\Work\\Tool\\tiktok\\TikToks\\'):
            if hashtag in name.lower():
                datalist.append(name)
        ci = Counter()
        avgdigg=0
        avgshare=0
        avgplay=0
        avgcomment=0
        is_trend=False
        l = datalist[0].replace('.json', '').split('_')
        if len(l)>3 and (l[3] == 't' or int(l[3]) > 0):
            is_trend=True
        with open('D:\\Work\\Tool\\tiktok\\TikToks\\' + datalist[0], 'r', encoding='utf-8', newline='\n') as filename_input:
            for line in filename_input:
                z = json.loads(line)
                if 'id' in z.keys():
                    ci[z['id']] += 1
                    avgdigg += int(z['stats']['diggCount'])
                    avgshare += int(z['stats']['shareCount'])
                    avgplay += int(z['stats']['playCount'])
                    avgcomment += int(z['stats']['commentCount'])
                elif 'itemInfos' in z.keys():
                    ci[z['itemInfos']['id']] += 1
                    avgdigg += int(z['itemInfos']['diggCount'])
                    avgshare += int(z['itemInfos']['shareCount'])
                    avgplay += int(z['itemInfos']['playCount'])
                    avgcomment += int(z['itemInfos']['commentCount'])
        avgdigg = avgdigg / len(ci.keys())
        avgshare = avgshare / len(ci.keys())
        avgplay = avgplay / len(ci.keys())
        avgcomment = avgcomment / len(ci.keys())
        writer.writerow([datalist[0],len(ci.keys()),'NA','NA',is_trend,avgdigg,avgshare,avgplay,avgcomment])

        for i in range(len(datalist)-1):
            cj=Counter()
            avgdigg = 0
            avgshare = 0
            avgplay = 0
            avgcomment = 0
            is_trend = False
            l = datalist[i+1].replace('.json', '').split('_')
            if len(l)>3 and (l[3] == 't' or int(l[3]) > 0):
                is_trend = True
            with open('D:\\Work\\Tool\\tiktok\\TikToks\\' + datalist[i+1], 'r', encoding='utf-8',newline='\n') as filename_input:
                for line in filename_input:
                    z = json.loads(line)
                    if 'id' in z.keys():
                        cj[z['id']] += 1
                        avgdigg += int(z['stats']['diggCount'])
                        avgshare += int(z['stats']['shareCount'])
                        avgplay += int(z['stats']['playCount'])
                        avgcomment += int(z['stats']['commentCount'])
                    elif 'itemInfos' in z.keys():
                        cj[z['itemInfos']['id']] += 1
                        avgdigg += int(z['itemInfos']['diggCount'])
                        avgshare += int(z['itemInfos']['shareCount'])
                        avgplay += int(z['itemInfos']['playCount'])
                        avgcomment += int(z['itemInfos']['commentCount'])
            print('In '+datalist[i]+'Not in '+datalist[i+1]+': ',len(list(set(ci.keys()) - set(cj.keys()))))
            print('In '+datalist[i+1]+'Not in '+datalist[i]+': ',len(list(set(cj.keys()) - set(ci.keys()))))
            print('Unique Ids in '+datalist[i]+':',len(ci.keys()))
            print('Unique Ids in '+datalist[i+1]+':',len(cj.keys()))
            avgdigg = avgdigg / len(cj.keys())
            avgshare = avgshare / len(cj.keys())
            avgplay = avgplay / len(cj.keys())
            avgcomment = avgcomment / len(cj.keys())
            writer.writerow([datalist[i+1], len(cj.keys()), len(list(set(ci.keys()) - set(cj.keys()))), len(list(set(cj.keys()) - set(ci.keys()))), is_trend, avgdigg, avgshare, avgplay, avgcomment])
            ci=cj

    
